library(tidyverse)
library(haven)
library(glue)
library(MatchIt)
library(Matching)
library(MatchingFrontier)
library(AER)
library(Amelia)
library(lattice)



#############################
##FIGURE 7 IN PAPER########
#########################

setwd("C:/Users/Reed/Dropbox/Gov2001 Replication/Original replication download/usedata")
survey_data <- read_dta("survey_data.dta")
survey_data$mi_m <- survey_data$`_mi_m`

covars <- c("age", "edu", "logdist", "female", "householdfinance","vote_GD")
# use Match function; defaults will estimate the ATT with M=1 for 1-to-1 matches #
# `Weight` = type of weighting scheme used by matching algorithm for each covar #

varlist <- c(  "score_asylum", "asylumspec_kids" ,
               "asylumgen" ,
               "asylumspec_terrorism",
               "asylumspec_criminality" ,
               "asylumspec_burden" ,
               "score_immig", 
               "incrdecr_economicimmigr" ,
               "borders_bordercontrol" ,
               "diff_muslim", 
               "diff_x" ,
               "score_muslim" ,
               "incrdecr_minority" ,
               "diff_immig" ,
               "islam_acquainted", 
               "islam_extremism" ,
               "score_behavioral" ,
               "notifyMPs_rec",
               "donation_bin" ,
               "donation_self" ,
               "petition_rec")
            #   "score_all", # Can add these if interested in looking. Not included in original paper
            #   "diff_nat")


prune_seq <- seq(150, 1200, 150)
impute_seq <- seq(1, 5, 1)

#varlist <- c(  "score_asylum", "asylumspec_kids") #Test on a smaller list of outcomes

# Run loop from here, it will take about 20 minutes on a typical laptop
counter=1 # for checking progress
graph_data <- data.frame(outcome = NA, prune = NA, N=NA, b=NA, se=NA)
time = proc.time()

for(j in varlist) {
  se_data <- data.frame(mi_m = NA, prune=NA, N=NA, se = NA)
    b_data <- data.frame(mi_m = NA, prune=NA, N=NA, b = NA)
  for (m in impute_seq) {
data_imputed <- survey_data %>% filter(mi_m == m)
clean.data <- as.data.frame(na.omit(data_imputed[,c(covars,j, "treatment", "munid","weight2_trim")]))
frontier <- makeFrontier(dataset = clean.data, 
                         treatment = "treatment", 
                         outcome = j,
                         match.on = c("age", "edu","female","householdfinance",
                                      "logdist", "vote_GD" ),
                         keep.vars = c("munid","weight2_trim"))
for(p in prune_seq){
frontier.dataset <- generateDataset(frontier.object = frontier, N = 1046) #nrow(frontier$dataset) - p)
b.out<-NULL
se.out<-NULL
formula <- as.formula(glue("{j} ~ treatment + female + age + as.factor(edu) | 
                           logdist + female + age + as.factor(edu)"))
iv.out <- ivreg(data = frontier.dataset, formula, 
                  weights = weight2_trim)
cluster.se <- sqrt(diag(vcovCL(iv.out, cluster = frontier.dataset$munid, type = "HC0",
                               target = NULL, inverse_var = FALSE)))
b.out <- rbind(b.out, iv.out$coef[2])
se.out <- rbind(se.out, cluster.se[2])
se_data <- rbind(se_data, c(m,p,nrow(frontier$dataset) - p, se.out))
b_data <- rbind(b_data, c(m,p,nrow(frontier$dataset) - p, b.out))
print(counter / (length(prune_seq)*length(varlist)*length(impute_seq)))
counter = counter+1
}
  }
    b_data <- na.omit(b_data)
    b_data <- as.matrix(dcast(b_data, prune ~ mi_m, value.var="b"))
    
    se_data <- na.omit(se_data)
    se_data <- as.matrix(dcast(se_data, N +prune ~ mi_m, value.var="se"))

combined.results <- mi.meld(q = as.matrix(b_data[,2:6]), se = as.matrix(se_data[,3:7]), byrow = F)
results <- data.frame(outcome=j, prune=rev(prune_seq), N = (se_data[,1]),
                      b = t(combined.results$q.mi), se = t(combined.results$se.mi))
graph_data <- rbind(graph_data, results)
}
time <-time - proc.time()
graph_data <- na.omit(graph_data)

save(graph_data, file = "matching_heatmap.RData")


load("matching_heatmap.Rdata")

#Add the original results to the heatmap 
for ( j in varlist ) { 
  b.out<-NULL
  se.out<-NULL
  var_data <- data.frame(mi_m = NA, b = NA, se = NA)
  N = NULL
  for(i in 1:5) {
    reg.data <- survey_data %>% filter(mi_m == i) 
    iv.out <- ivreg(data = reg.data, reg.data[[j]] ~ treatment + female + age + as.factor(edu)| 
                      logdist + female + age + as.factor(edu), weights = weight2_trim)
    cluster.se <- sqrt(diag(vcovCL(iv.out, cluster = iv.out$model$munid, type = "HC0",
                                   target = NULL, inverse_var = FALSE)))
    b.out <- rbind(b.out, iv.out$coef)
    se.out <- rbind(se.out, cluster.se)
    N <- nrow(iv.out$model)
  }
combined.results <- mi.meld(q = b.out, se = se.out)
graph_data <- rbind(graph_data, c(j, 0, N, combined.results$q.mi[2], combined.results$se.mi[2]))
}

#Format data for color-coding

graph_data$positive <- ifelse(graph_data$b>0, 1, -1)
graph_data$sig <- ifelse(abs(as.numeric(graph_data$b))-(1.96*as.numeric(graph_data$se)) > 0, 2, 1)
graph_data$sig <- ifelse(abs(as.numeric(graph_data$b))-(2.576*as.numeric(graph_data$se)) > 0, 3, graph_data$sig)
graph_data$code <- as.numeric(graph_data$positive*graph_data$sig)
graph_data$prune <- as.numeric(graph_data$prune)
graph_data$outcome <- factor(graph_data$outcome, levels=rev(varlist))

graph_data_backup <- graph_data
save(graph_data_backup, file = "matching_heatmap_backup.RData")

# make heatmap
library(lattice)
labs <- rev(c( "Asylum-seeker component (SD=1)",
               "Ban from schools (1-5)" ,
               "Fewer asylum-seekers (1-5)" ,
               "More terror attacks (1-5)",
               "More crimes (1-5)" ,
               "Are a burden (1-5)" ,
               "Immigrant component (SD=1)", 
               "Fewer economic migrants (1-5)" ,
               "Increase border protection (1-5)" ,
               "T(Muslim-Muslim immi.) (1-5)", 
               "T(Christ.-Christ. immi.) (1-5)" ,
               "Muslim Component (SD=1)" ,
               "Decrease representation (1-5)" ,
               "T(Christ. immi.-Muslim immi.) (1-5)" ,
               "How many do not integrate (1-5)",
               "How many support extremists (1-5)" ,
               "Behavioral component (SD=1)" ,
               "Notify MP? (-2,2)",
               "Donate? (0,1)" ,
               "(100-donation)/100 (0-1)" ,
               "Sign petition? (0,1)"))
              # "Total Component (SD=1)", 
              # "T(Christ. - Muslim) (1-5)"))

load("matching_heatmap_backup.RData")

matching_heatmap <- levelplot(graph_data$code ~ graph_data$prune * graph_data$outcome,
          #at=c(-2.5,-2.01,-1.01,0.99,1.99, 2.49), 
          col.regions = colorRampPalette(
            c("#ddddff","#ffdddd", "red", "darkred")),
          colorkey = list(
            labels = list(labels = c("Pos.**","Pos.*","Pos.", "Neg."),
                          at = c(2.5, 1.5, 0.5,-.5)),
            at = c(3, 2, 1,0,-1)
          ),
          xlab = "Observations Pruned", ylab = "Outcomes",
          scales=list(x=list(at=unique(graph_data$prune), 
                             labels=unique(graph_data$prune)),
                      y=list(at=seq(1,23,1), labels=labs))
          )
trellis.device(device="pdf", file="matching_heatmap_3.pdf",
               height = (6), width = (10)
               )
print(matching_heatmap)
dev.off()

# See how beta coefficients vary as well...
levelplot(graph_data$b ~ graph_data$prune * graph_data$outcome, 
          col.regions = colorRampPalette(
            c("white", "red", "darkred")),
          at = c(-.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5),
          xlab = "Observations Pruned", ylab = "Outcomes",
          scales=list(x=list(at=unique(graph_data$prune), 
                             labels=unique(graph_data$prune)),
                      y=list(at=seq(1,23,1), labels=labs)))
#They tend to vary some, but seldom more than about 0.1 









